---
title: "Vintage"
author: "Yanran"
date: "02/19/2023"
output: html_document
---

```{r library}
library(tidyverse)
library(tidycensus)
library(tigris)
library(sp)
library(stringr)
library(haven)
library(reshape2)
library(rstudioapi)
library(dplyr)
library(RColorBrewer)
library(purrr)
```



```{r}
CT0825 <- read.csv(file = 'nhgis_ppdd_20220825_p_tract.csv')
CT.ga <- CT0825 %>% filter(state == "13")

racemg<-data.frame('race'=c('total','white','black','am_indian','asian','pac_islander','hispanic'),'racevar'=c('H76','H9I', 'H9B', 'H9C', 'H9D', 'H9E', 'H9H'))
nums65<-str_pad(c(3:19,27:43),width=3,side='left',pad='0')
vnames<-c(paste0(rep(racemg$racevar[1:nrow(racemg)],each=length(nums65)),nums65,"_dp"))

```



```{r}
## create fips code ##

CT.ga$GEOID <- as.numeric(paste0(substr(CT.ga$gisjoin,2,3),substr(CT.ga$gisjoin,5,7), substr(CT.ga$gisjoin,9,nchar(CT.ga$gisjoin))))

## subset to only variables we will use for denominator ##
CT.ga<-CT.ga[,c('GEOID',vnames)]

## put in long form and organize ##
CT.ga<-melt(data = CT.ga, id.vars = "GEOID", measure.vars = vnames,factorsAsStrings = T)
CT.ga$variable<-as.character(CT.ga$variable)

## racevar is a variable that tells us what racegroup the row represents ##
CT.ga$racevar<-substr(CT.ga$variable,1,3)
## vnum is a variable that tells us the sex and age group the row represents ##
CT.ga$vnum<-as.numeric(substr(CT.ga$variable,5,6))

## create a dataset with proper race and sex variables and an age group variable with consistent groupings (called rxsxa) based on the racevar and vnum variables ##
## age ranges ##
agecat<-paste0('Age',c('0-4','5-9','10-14','15-17','18-19','20-24','25-29','30-34','35-44','45-54','55-64'))

rxsxa<-CT.ga[!duplicated(CT.ga[,c('racevar','vnum')]),c('racevar','vnum')]
rxsxa<-rxsxa[order(rxsxa$racevar,rxsxa$vnum),]

realign_fineage<-c(1:5,6,6,6,7,8,9,9,10,10,11,11,11)
rxsxa$age<-rep(realign_fineage,2*(length(unique(rxsxa$racevar))))
rxsxa$sex<-rep(rep(c('m','f'),each=length(realign_fineage)),length(unique(rxsxa$racevar)))
rxsxa<-merge(rxsxa,data.frame('age'=1:length(agecat),'agecat'=agecat),by='age')
rxsxa<-merge(rxsxa,racemg,by='racevar')

CT.ga<-merge(CT.ga,rxsxa,by=c('racevar','vnum'))

## need to now aggregate population size and MOE where applicable due to the inconsistent age groupings ##
CT.ga<-aggregate(value~GEOID+race+agecat+sex,data=CT.ga,
                 FUN=sum,na.rm=T)
names(CT.ga)[5]<-'dp0825_pop'
ga_dp0825 <- CT.ga
save(ga_dp0825,file='ga_dp0825_data.RData')

rm(list=ls())

#######################################
## 4. merge ACS, census, and dp data ##
#######################################

## read each processed dataset ##
load('~/Desktop/Research/Rachel/CT_race_diff_privacy/GA/ga_acs_data.RData')

load('~/Desktop/Research/Rachel/CT_race_diff_privacy/GA/ga_ce_data.RData')

load('~/Desktop/Research/Rachel/CT_race_diff_privacy/GA/ga_dp_data.RData') #ga_dp

load('~/Desktop/Research/Rachel/CT_race_diff_privacy/GA/ga_dp0527_data.RData') #CT.ga

load('~/Desktop/Research/Rachel/CT_race_diff_privacy/GA/ga_dp22_data.RData') #dat.dp

load('~/Desktop/Research/Rachel/CT_race_diff_privacy/GA/ga_dp0825_data.RData') #ga_dp0825

## merge them by fips code, race, age, and sex ##
mg1<-merge(ga_acs,ga_ce,by=c('GEOID','race','agecat','sex'))

adat<-merge(mg1,ga_dp,by=c('GEOID','race','agecat','sex')) %>% merge(CT.ga,by=c('GEOID','race','agecat','sex'))

adat<-merge(adat,dat.dp,by=c('GEOID','race','agecat','sex'))

adat<-merge(adat,ga_dp0825,by=c('GEOID','race','agecat','sex'))

save(adat,file='ga_merged_denom_cov_data5.RData')
rm(list=ls())

load('ga_merged_denom_cov_data5.RData')
```




